In this first chunk, let’s read in the Census microdata. Here is some example code on how to read in the data, create new variables to categorize the rows of data into groups, and then summarize the data to create information about Louisville.
Our goal is to create variables for gender, age group, whether someone is a mother, whether someone is married, their level of education, their income, whether they are the head of household, and the number of children they have.
This code chunk will identify which households are homeowners vs. renters (in the homeownership variable) and which households are cost-burdened, meaning they pay more thatn 30% of their income toward rent or a mortgage (in the cost_burden variable).
There are also variables for severe cost burden (households that pay more than half of their income towards housing) and households with severe housing problems (lacking a kitchen, adequate plumbing, or an ample number of rooms for the number of people living there).
load("clean_svybydemog_data.RData")
#Waffle Chart
temp_df <- H_earntype %>%
filter(race == 'total',
var_type == "percent", sex == "total") %>%
pivot_wider(names_from = "male_fem_mult_earn", values_from = "homeownership")
trend(temp_df,
multiple_earner:single_male_earner,
plot_title = "Homeownership by Year",
cat = c("Multiple Earners" = "multiple_earner", "Single Female" = "single_fem_earner", "Single Male" = "single_male_earner"),
pctiles = F,
y_title = 'Percent',
rollmean = 3,
caption_text =
"Source: Greater Louisville Project
Data from GLP analysis of ACS microdata from IPUMS-USA")
# H_singFem_rank <- census_microdata081122 %>%
# filter(
# year == '2019',
# earner_type == 'single_earner') %>%
# survey_by_demog('homeownership', weight_var = "HHWT") %>%
# filter(
# sex == 'total',
# race == 'total',
# var_type == 'percent')
temp_df <- H_earntype %>%
filter(male_fem_mult_earn == "single_fem_earner",
var_type == "percent", sex == "total") %>%
mutate(sex = "total")
ranking(temp_df,
'homeownership',
plot_title = "Single Earner Female Homeownership",
caption_text =
"Source: Greater Louisville Project
Data from GLP analysis of ACS microdata from IPUMS-USA")
# H_s_Femkids_trend <- census_microdata081122 %>%
# filter(earner_type == 'single_earner') %>%
# survey_by_demog( weight_var = "HHWT", 'homeownership', other_grouping_vars = c("kd_pres"))
H_s_Femkids_trend %<>%
filter(
var_type == 'percent',
race == 'total',
sex == "female") %>%
pivot_wider(names_from = 'kd_pres', values_from = 'homeownership') %>%
select(-sex)
trend(H_s_Femkids_trend,
kids:no_kids,
rollmean = 3,
plot_title = "Female Homeownership by Presence of Children",
cat = c("Children" = "kids", "No Children" = "no_kids"),
y_title = 'Percent',
caption_text =
"Source: Greater Louisville Project
Data from GLP analysis of ACS microdata from IPUMS-USA")
# H_sinFem_kids <-census_microdata081122 %>%
# filter(
# year == '2019',
# earner_type == 'single_earner',
# NCHILD > 0) %>%
# survey_by_demog('homeownership', weight_var = "HHWT") %>%
# filter(
# sex == 'total',
# race == 'total',
# var_type == 'percent')
ranking(H_sinFem_kids, 'homeownership',
plot_title = "Single Earner Female Homeownership with Children",
title_scale = 0.8,
caption_text =
"Source: Greater Louisville Project
Data from GLP analysis of ACS microdata from IPUMS-USA")
#H_Fem_race <- survey_by_demog(census_microdata081122 ,weight_var = "HHWT", 'homeownership')
# H_Fem_race <- census_microdata081122 %>%
# filter(sex == 'female') %>%
# survey_by_demog( weight_var = "HHWT", 'homeownership')
temp_df <- H_earntype %>%
filter(male_fem_mult_earn == "single_fem_earner",
var_type == "percent", sex == "total")
trend(filter(temp_df, race != "hispanic"),
homeownership,
rollmean = 3,
pctiles = F,
plot_title = "Single Female Homeownership by Year",
cat = 'race',
y_title = 'Percent',
caption_text =
"Source Greater Louisville Project
Data from GLP analysis of ACS microdata from IPUMS-USA")
#################KEEP################
## create hist for each of the most three recent years -> just for verification...that there isnt anything weird
## if feeling funky, use gganimate to create a gif of histograms from 2000 to 2019
#TO DO:
# Change the x-axis to a dollar format (see trendline function in trendline_helpers.R) - done
# Label the x-axis on every $50,000? - done
# Let's use counts on the y-axis for the male and female graph - done?
# Format the y-axis with commas - done
# remove legend? - done
#p$HHINCOME %>% dollar(accuracy = 0.1, scale = .001, suffix = "k")
single_earner_pctiles <- lville_2019 %>%
group_by(sex) %>%
summarize(
ten_pct = Hmisc::wtd.quantile(HHINCOME, HHWT, probs = 0.1),
twenty_five_pct = Hmisc::wtd.quantile(HHINCOME, HHWT, probs = 0.25),
fifty_pct = Hmisc::wtd.quantile(HHINCOME, HHWT, probs = 0.5),
seventy_five_pct = Hmisc::wtd.quantile(HHINCOME, HHWT, probs = 0.75),
ninety_pct = Hmisc::wtd.quantile(HHINCOME, HHWT, probs = 0.9))
library(gt)
gt(single_earner_pctiles)
| sex | ten_pct | twenty_five_pct | fifty_pct | seventy_five_pct | ninety_pct |
|---|---|---|---|---|---|
| female | 11200 | 25000 | 50000 | 91000 | 152000 |
| male | 18000 | 37000 | 66900 | 108400 | 170000 |
p <- lville_2019 %>%
filter(HHINCOME <= cut_95,
earner_type == "single_earner") %>%
func_plt_hist_overlay( "sex")
p <- p + glp_graph_theme
p <- p + labs(
title = "Single Earner Income by Gender",
) +
ylab(" ") +
guides(color = FALSE) +
facet_wrap(~sex, nrow = 2) +
theme(
#axis.ticks.x = element_line(size = 50000),
strip.text = element_blank()
) +
scale_x_continuous(
breaks = c(50000, 100000, 150000, 200000),
label = c("$50k", "$100k", "$150k", "$200k")
) +
scale_y_continuous(labels = scales::comma)
p
# Need help turning yaxis into percents
sing_fem_inc_race <- lville_2019 %>%
filter(
sex == 'female',
earner_type == 'single_earner',
HHINCOME <= cut_95) %>%
func_plt_hist_overlay( "race")
sing_fem_inc_race <- sing_fem_inc_race + facet_wrap(~race, nrow = 2, scales = "free_y")
sing_fem_inc_race <- sing_fem_inc_race + glp_graph_theme
sing_fem_inc_race <- sing_fem_inc_race +
labs(
title = "Single Female Earner Income",
) +
ylab(" ") +
guides(color = FALSE)
sing_fem_inc_race <- sing_fem_inc_race +
theme(
#axis.ticks.x = element_line(size = 50000),
strip.text = element_blank()
) +
scale_x_continuous(
breaks = c(50000, 100000, 150000),
label = c("$50k", "$100k", "$150k")
) +
scale_y_continuous(labels = scales::comma)
sing_fem_inc_race
# TO DO
# see above for x and y axis to-dos
# Drop the legend title (see )
cost_burden_sf <- lville_2019 %>%
filter(
sex == 'female',
earner_type == 'single_earner',
HHINCOME <= cut_95) %>%
mutate(
cost_burden = factor(cost_burden,
levels = rev(c(TRUE, FALSE)),
labels = rev(c("Cost Burdened", "Non Cost Burdened")),
ordered = TRUE))
cost_burden_sf_plot <- ggplot(cost_burden_sf,
aes(x = HHINCOME, fill = cost_burden, weights = HHWT),
alpha=0.5,
position = "stack",
binwidth = 10000) +
geom_histogram()
cost_burden_sf_plot <- cost_burden_sf_plot + glp_graph_theme
cost_burden_sf_plot <- cost_burden_sf_plot +
labs(
title = "Single Female Earner Cost Burden",
) +
ylab(" ") +
xlab("Household Income") +
guides(color = FALSE) +
theme(
#axis.ticks.x = element_line(size = 50000),
strip.text = element_blank()
) +
scale_fill_manual(values = c("#0E4A99", "#F58021")) +
scale_x_continuous(
breaks = c(50000, 100000, 150000, 200000),
label = c("$50k", "$100k", "$150k", "$200k")
) +
scale_y_continuous(labels = scales::comma)
cost_burden_sf_plot
#I_CB_earn_trend <- survey_by_demog(census_microdata081122, weight_var = "HHWT", 'cost_burden', other_grouping_vars = c('earner_type'), breakdowns = "sex")
# I_CB_earn_trend <- survey_by_demog(census_microdata081122, weight_var = "HHWT", 'cost_burden', other_grouping_vars = c('male_fem_mult_earn'))
I_CB_earn_trend %<>%
filter(
var_type == 'percent',
race == 'total',
sex == 'total') %>%
select( -c(sex,race)) %>%
pivot_wider(names_from = "male_fem_mult_earn", values_from = "cost_burden")
trend(I_CB_earn_trend,
multiple_earner:single_fem_earner:single_male_earner,
pctiles = F,
plot_title = "Cost Burden by Earner Type",
cat = c("Multiple Earners" = "multiple_earner", "Single Female Earner" = "single_fem_earner", "Single Male Earner" = "single_male_earner"),
y_title = 'Percent',
caption_text =
"Source Greater Louisville Project
Data from GLP analysis of ACS microdata from IPUMS-USA")
#will need to see if x-axis label knit correctly on Harrison's
#having issues switching to glp colors & renaming legend values
#chart_possible_colors <- c("#0E4A99", "#F58021", "#00A9B7", "#800055", "#356E39", "#CFB94C", "#7E9C80")
# chart_number_I_need = length(unique(data$group))
# chart_these_colors <- possible_colors[1:number_I_need]
I_median_earn_age <- lville_2019 %>%
group_by(age_group, male_fem_mult_earn) %>%
summarize(Med=median(HHINCOME))
I_median_earn_age_plot <- ggplot(I_median_earn_age,
aes(x=age_group, y=Med, fill = male_fem_mult_earn)) +
geom_bar(stat="identity", position='dodge')
I_median_earn_age_plot <- I_median_earn_age_plot + glp_graph_theme
I_median_earn_age_plot <- I_median_earn_age_plot +
labs(
title = "Median Earnings by Earning Type",
) +
ylab("Household Income") +
xlab("Age Group") +
scale_y_continuous(labels = scales::dollar) +
scale_fill_manual(
values = c("#0E4A99", "#F58021", "#00A9B7"),
labels = c("Multiple Earner", "Single Female Earner", "Single Male Earner"))
I_median_earn_age_plot
#issues displaying the percentages of each group
E_singM_singF <- lville_2019 %>%
filter(earner_type == 'single_earner') %>%
group_by(sex, educ) %>%
summarize(n=sum(HHWT, na.rm = TRUE)) %>%
mutate(
total = sum(n),
rate = n/sum(n)*100,
educ = factor(educ,
levels = rev(c("no_hs", "hs", "some_col", "assoc", "bach","grad")),
ordered = TRUE))
E_singM_singF_plot <- ggplot(E_singM_singF,
aes(x=sex, y=rate, fill = educ)) +
geom_bar(stat="identity", position = "fill")
# geom_col(aes(fill = educ)) +
# geom_text(aes(label = rate, position = position_stack(vjust = 0.5))
#)
E_singM_singF_plot <- E_singM_singF_plot + glp_graph_theme
E_singM_singF_plot <- E_singM_singF_plot +
theme(
legend.position = "right"
) +
labs(
title = "Educational attainment by gender for single earners",
) +
ylab(" ") +
xlab(" ") +
scale_fill_discrete(labels = c("Graduate","Bachelor", "Associate", "Some College", "High School", "No High School")) +
scale_x_discrete (labels = c("female" = "Female", "male" = "Male")) +
scale_y_continuous(labels = scales::percent)
E_singM_singF_plot
#show percentages in bars?
#how does it look when harrison knits it?
E_singF_race <- lville_2019 %>%
filter(
sex == 'female',
earner_type == 'single_earner') %>%
group_by(race, educ) %>%
summarize(n=sum(HHWT, na.rm = TRUE)) %>%
mutate(
total = sum(n),
rate = n/sum(n)*100,
educ = factor(educ,
levels = rev(c("no_hs", "hs", "some_col", "assoc", "bach","grad")),
ordered = TRUE))
E_singF_race_plot <- ggplot(E_singF_race, aes(x=race, y=rate, fill=educ)) +
geom_bar(stat="identity", position='fill')
E_singF_race_plot <- E_singF_race_plot + glp_graph_theme
E_singF_race_plot <- E_singF_race_plot +
theme(
legend.position = "right"
) +
labs(
title = "Single Female Education Breakdown",
) +
ylab(" ") +
xlab("Race") +
scale_fill_discrete(labels = c("Graduate","Bachelor", "Associate", "Some College", "High School", "No High School")) +
scale_x_discrete (labels = c("female" = "Female", "male" = "Male")) +
scale_y_continuous(labels = scales::percent)
E_singF_race_plot
#will xaxis look normal if knitted from harrison's computer?
# cost_burden_age_sf <- census_microdata081122 %>%
# filter(year %in% 2010:2019) %>%
# mutate(
# cost_burden = factor(cost_burden,
# levels = rev(c(TRUE, FALSE)),
# labels = rev(c("Cost Burdened", "Non Cost Burdened")),
# ordered = TRUE)
# )
cost_burden_age_sf %<>% drop_na(cost_burden) #this will need to be run once and then left alone if tweaking graphs
cost_burden_age_sf_plot <- ggplot(cost_burden_age_sf,
aes(x=age_group, y=HHWT , fill=cost_burden),
color="#00A9B7") +
geom_bar(stat="identity", position='fill')
cost_burden_age_sf_plot <- cost_burden_age_sf_plot + glp_graph_theme
cost_burden_age_sf_plot <- cost_burden_age_sf_plot +
theme(
legend.position = "right"
) +
labs(
title = "Cost Burdened Status by Age",
) +
ylab(" ") +
xlab("Race") +
scale_fill_discrete(labels = c("Non Cost Burdened", "Cost Burdened")) +
#scale_x_discrete (labels = c("female" = "Female", "male" = "Male")) +
scale_y_continuous(labels = scales::percent)
cost_burden_age_sf_plot
#x-axis...not legible on Josh's comp
temp_df <- cost_burden_age_sf %>%
mutate(
age_group = case_when(
age %in% 15:19 ~ NA_character_,
age %in% 20:29 ~ "20-29",
age %in% 30:39 ~ "30-39",
age %in% 40:49 ~ "40-49",
age %in% 50:59 ~ "50-59",
age %in% 60:69 ~ "60-69",
age %in% 70:79 ~ "70-79",
age >= 80 ~ "80+"))
cost_burden_age_sf_facet_plt <- ggplot(temp_df,
aes(x=age_group, y=HHWT , fill=cost_burden),
color="#00A9B7") +
geom_bar(stat="identity", position='fill')+
facet_wrap(~male_fem_mult_earn)
cost_burden_age_sf_facet_plt <- cost_burden_age_sf_facet_plt + glp_graph_theme
cost_burden_age_sf_facet_plt <- cost_burden_age_sf_facet_plt +
theme(
legend.position = "right",
strip.text = element_text(size = 40)
) +
labs(
title = "Cost Burdened Status by Age and Earner Type",
) +
ylab(" ") +
xlab(" ") +
scale_fill_discrete(labels = c("Non Cost Burdened", "Cost Burdened")) +
scale_x_discrete(guide = guide_axis(n.dodge=2)) +
scale_y_continuous(labels = scales::percent)
cost_burden_age_sf_facet_plt
#does this specficially need to be for louisville?
#once data is decided, add this data frame to the saved data frame chunk at beginning of rmd file
#x-axis is crowded
#will knit look better from Harrison's
earner_trend <- census_microdata081122 %>%
mutate(
male_fem_mult_earn = case_when(
sex == 'female' & earner_type == 'single_earner' ~ 'single_fem_earner',
sex == 'male' & earner_type == 'single_earner' ~ 'single_male_earner',
earner_type == 'multi_earner' ~ 'multiple_earner')
) %>%
group_by(year, male_fem_mult_earn) %>%
summarize(n=sum(HHWT, na.rm = TRUE)) %>%
mutate(
total = sum(n),
rate = n/sum(n)*100)
earner_trend_plt <- ggplot(earner_trend,
aes(x=year, y=rate, fill=male_fem_mult_earn),
color="#00A9B7") +
geom_bar(stat="identity", position='fill')
earner_trend_plt <- earner_trend_plt + glp_graph_theme
earner_trend_plt <- earner_trend_plt +
theme(
legend.position = "right"
#strip.text = element_blank()
) +
labs(
title = "Earner Type Trend"
) +
ylab(" ") +
xlab(" ") +
scale_fill_discrete(labels = c("Multiple Earner", "Single Female Earner", "Single Male Earner")) +
scale_y_continuous(labels = scales::percent)
earner_trend_plt